#coding=utf-8
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt

cifar10_dir = '/home/zxy/PycharmProjects/cs231n/assignment1/cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

print('Training data shape', X_train.shape)
print('Training labels shape', y_train.shape)
print('Test data shape', X_test.shape)
print('Test labels shape', y_test.shape)

#从数据集抓一些例子图片出来看看
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
sample_per_class = 7
for y, cls in enumerate(classes):
    idxs = np.flatnonzero(y_train == y) #返回非0元素的位置，也就是训练集的图片的位置
    idxs = np.random.choice(idxs, sample_per_class, replace=False) #从训练集图片的位置随机选择7张图片，Replace为True代表有重复的
    for i, idx in enumerate(idxs):
        plt_idx = i * num_classes + y + 1 #画图是一列一列的画
        plt.subplot(sample_per_class, num_classes, plt_idx) #设置当前这张图片在画布中的位置
        plt.imshow(X_train[idx].astype('uint8')) #展示图像
        plt.axis('off') #不显示坐标
        if i == 0: #第一行展示类别名
            plt.title(cls)
plt.show()

num_training = 49000
num_validation = 1000
num_test = 1000
num_dev = 500

#验证集
mask = range(num_training, num_training+num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
#训练集
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
#从训练集挑一个子集出来，大小为num_dev
mask = np.random.choice(num_training, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]
#测试集
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
#打印各种集合的shape
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test label shape', y_test.shape)

#将图片数据拉成列向量
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

print('Training data shape', X_train.shape)
print('Validation data shape', X_val.shape)
print('Test data shape', X_test.shape)
print('Dev data shape', X_dev.shape)

#减均值
mean_image = np.mean(X_train, axis=0)
print(mean_image[:10])
plt.figure(figsize=(4, 4))
plt.imshow(mean_image.reshape(32, 32, 3).astype('uint8'))
plt.show()


X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

#加上bias，bias初始化为0
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

#使用SVM分类器进行分类
from cs231n.classifiers import linear_svm
import time

#产生随机的小值的SVM权值矩阵
W = np.random.randn(3073, 10) * 0.0001
loss, grad = linear_svm.svm_loss_native(W, X_dev, y_dev, 0.00001)
print('loss: %f' % loss)

#使用cs231n提供的数值梯度检查代码检查梯度是否计算正确（cs231n使用了数值梯度计算）
#没加入L2正则化的梯度检查
from cs231n.gradient_check import grad_check_sparse
f = lambda  w : linear_svm.svm_loss_native(w, X_dev, y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

#加入了L2正则化的梯度检查
loss, grad = linear_svm.svm_loss_native(W, X_dev, y_dev, 1e2)
f = lambda w : linear_svm.svm_loss_native(w, X_dev, y_dev, 1e2)[0]
grad_numerical = grad_check_sparse(f, W, grad)

#Question1:有时候梯度检查中数值梯度和解析提梯度并不是恰好相等，这样的现象可能是由什么导致的？
#它是一个值得关心的问题吗？梯度检查失败的一个例子？提示:SVM损失函数严格来说不是可微的
#答案：原因可能是损失函数在某些点不是可微的。例如RELU激活函数f(x)=max(0,x)在x=0的时候不是可微的。
#数值梯度公式为:\frac{df(x)}{dx}=\frac{f(x+h)-f(x-h)}{2h},使用这公式，h/2的数值梯度f'(h/2)=3/4，然而解析梯度f'(h)=1
#因此在x=0*附近差距就拉开了

#接下来计算两种不同的方式计算梯度的耗时
tic = time.time()
loss_native, grad_native = linear_svm.svm_loss_native(W, X_dev, y_dev, 0.00001)
toc = time.time()
print('Native loss: %e computed in %fs' % (loss_native, toc-tic))

tic = time.time()
loss_vectorized, _ = linear_svm.svm_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print('Vectorized loss: %e computed in %fs'%(loss_vectorized, toc-tic))

#输出两种方法算出来的损失函数差
print('difference: %f' % (loss_native - loss_vectorized))
#SGD
from cs231n.classifiers import linear_classifier
svm = linear_classifier.LinearClassifier()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=1500, verbose=True)
toc = time.time()
print('That took %fs' % (toc-tic))
#画出损失函数的图像
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()
#计算训练集的准确率和验证集的准确率
y_train_pred = svm.predict(X_train)
print('training accuracy: %f' % (np.mean(y_train == y_train_pred)))
y_val_pred = svm.predict(X_val)
print('validation accuracy: %f'%(np.mean(y_val == y_val_pred)))

#接下来就是超参数的选择(学习率，正则化系数)
learning_rates = [1.4e-7, 1.5e-7, 1.6e-7]
regularization_strengths = [(1+i*0.1)*1e4 for i in range(-3, 3)] + [(2+i*0.1)*1e4 for i in range(-3, 3)]
#结果是字典的映射元组的形式
results = {}
best_val = -1
best_svm = None

for rs in regularization_strengths:
    for lr in learning_rates:
        svm = linear_classifier.LinearClassifier()
        loss_hist = svm.train(X_train, y_train, lr, rs, num_iters=3000)
        y_train_pred = svm.predict(X_train)
        train_accuracy = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        val_accuracy = (np.mean(y_val == y_val_pred))
        if val_accuracy > best_val:
            best_val = val_accuracy
            best_svm = svm
        results[(lr, rs)] = train_accuracy, val_accuracy

#打印结果
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print('lr %e reg %e train accuracy: %f val accuracy: %f' % (lr, reg, train_accuracy, val_accuracy))
print('best validation accuracy achieved during cross-validation: %f' % best_val)

#可视化交叉验证结果
import math
x_scatter = [math.log10(x[0]) for x in results]
y_scatter = [math.log10(x[1]) for x in results]
#可视化训练准确率
marker_size = 100
colors = [results[x][0] for x in results]
plt.subplot(2, 1, 1)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization accuracy')
plt.title('CIFAR-10 training accuracy')

#可视化验证集准确率
marker_size = 100
colors = [results[x][1] for x in results]
plt.subplot(2, 1, 2)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors) #mark_size点的大小
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization accuracy')
plt.title('CIFAR-10 training accuracy')
plt.show()

#在测试集评估最好的svm模型
y_test_pred = best_svm.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)

# 可视化对于每一类物体学到的权值
w = best_svm.W[:-1, :] #去掉bias
w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for i in range(10):
    plt.subplot(2, 5, i+1)
    #将权重矩阵反缩放到【0,255】
    wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min) #np.squeeze删除一维
    plt.imshow(wimg.astype('uint8'))
    plt.axis('off') #不显示坐标尺
    plt.title(classes[i])

plt.show()
#Problem 2: 描述SVM权重长得像什么样子,并对为什么长这个样子做一个简短解释?
#可视化的SVM权重看起来像是具有相应对象的平均轮廓，而这正式它们所期望的响应。因为分数是样本和权重的内部
#折中，如果我们想在正确的标签中获得更高的分数，则相应的权重应该与样本更加平行化





